home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Tech Arsenal 1
/
Tech Arsenal (Arsenal Computer).ISO
/
tek-04
/
bipl.zip
/
PROCS.ZIP
/
NGRAMS.ICN
< prev
next >
Wrap
Text File
|
1992-09-28
|
2KB
|
51 lines
############################################################################
#
# File: ngrams.icn
#
# Subject: Procedure to generate n-grams
#
# Author: Ralph E. Griswold
#
# Date: June 10, 1988
#
###########################################################################
#
# The procedure ngrams(file,n,c,t) generates a tabulation of the n-grams
# in the specified file. If c is non-null, it is used as the set of
# characters from which n-grams are taken (other characters break n-grams).
# The default for c is the upper- and lowercase letters. If t is non-null,
# the tabulation is given in order of frequency; otherwise in alphabetical
# order of n-grams.
#
# Note:
#
# The n-grams are kept in a table within the procedure and all n-grams
# are processed before the tabulation is generated. Consequently, this
# procedure is unsuitable if there are very many different n-grams.
#
############################################################################
procedure ngrams(f,i,c,t)
local line, grams, a, count
if not (integer(i) > 0) then stop("invalid ngrams specification")
if type(f) ~== ("file" | "window") then stop("invalid file specification")
/c := &lcase || &ucase
if not (c := cset(c)) then stop("invalid cset specification")
grams := table(0)
line := ""
while line ||:= reads(f,1000) do
line ? while tab(upto(c)) do
(tab(many(c)) \ 1) ? while grams[move(i)] +:= 1 do
move(-i + 1)
if /t then {
a := sort(grams,4)
while count := pull(a) do
suspend pull(a) || right(count,8)
}
else {
a := sort(grams,3)
suspend |(get(a) || right(get(a),8))
}
end